miχpods: MOM6
Contents
miχpods: MOM6#
test out hvplot
build LES catalog
bootstrap error on mean, median?
Try daily vs hourly
add TAO χpods
fix EUC max at 110
Questions:
N2 vs N2T
match time intervals
match vertical resolution and extent
restrict TAO S2 to ADCP depth range
restrict to top 200m.
References#
Warner & Moum (2019)
Setup#
%load_ext watermark
import datetime
import glob
import os
import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import holoviews as hv
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import xgcm
import pump
from pump import mixpods
hv.notebook_extension('bokeh')
dask.config.set({"array.slicing.split_large_chunks": False})
plt.rcParams["figure.dpi"] = 140
plt.style.use("bmh")
xr.set_options(keep_attrs=True, display_expand_data=False)
gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/" # MITgcm output directory
stationdirname = gcmdir
%watermark -iv
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask_jobqueue/core.py:20: FutureWarning: tmpfile is deprecated and will be removed in a future release. Please use dask.utils.tmpfile instead.
from distributed.utils import tmpfile
distributed: 2022.7.0
numpy : 1.22.4
xarray : 2022.6.0rc0
xgcm : 0.6.0
matplotlib : 3.5.2
holoviews : 1.14.9
sys : 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:04:59) [GCC 10.3.0]
dask : 2022.7.0
json : 2.0.9
flox : 0.5.9
hvplot : 0.8.0
pump : 0.1
cf_xarray : 0.7.4
dcpy : 0.1.dev385+g121534c
import ncar_jobqueue
if "client" in locals():
client.close()
del client
if "cluster" in locals():
cluster.close()
#env = {"OMP_NUM_THREADS": "3", "NUMBA_NUM_THREADS": "3"}
# cluster = distributed.LocalCluster(
# n_workers=8,
# threads_per_worker=1,
# env=env
# )
if "cluster" in locals():
del cluster
#cluster = ncar_jobqueue.NCARCluster(
# project="NCGD0011",
# scheduler_options=dict(dashboard_address=":9797"),
#)
# cluster = dask_jobqueue.PBSCluster(
# cores=9, processes=9, memory="108GB", walltime="02:00:00", project="NCGD0043",
# env_extra=env,
# )
import dask_jobqueue
cluster = dask_jobqueue.PBSCluster(
cores=1, # The number of cores you want
memory='12GB', # Amount of memory
processes=1, # How many processes
queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
local_directory='$TMPDIR', # Use your local directory
resource_spec='select=1:ncpus=1:mem=12GB', # Specify resources
project='ncgd0011', # Input your project ID here
walltime='02:00:00', # Amount of wall time
interface='ib0', # Interface to use
)
cluster.adapt(minimum_jobs=2, maximum_jobs=12)
client = distributed.Client(cluster)
dask.config.set(scheduler=client)
client
Client
Client-cea847b1-12a4-11ed-b1af-3cecef1b12d4
| Connection method: Cluster object | Cluster type: dask_jobqueue.PBSCluster |
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status |
Cluster Info
PBSCluster
26f18e26
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status | Workers: 0 |
| Total threads: 0 | Total memory: 0 B |
Scheduler Info
Scheduler
Scheduler-b8336077-ab87-41ef-8cff-8055f10b7b8b
| Comm: tcp://10.12.206.40:38925 | Workers: 0 |
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status | Total threads: 0 |
| Started: Just now | Total memory: 0 B |
Workers
Read data#
TAO#
tao_gridded = (
xr.open_dataset(
os.path.expanduser("~/work/pump/zarrs/tao-gridded-ancillary.zarr"), chunks="auto", engine="zarr"
)
.sel(longitude=-140, time=slice("1996", None))
)
tao_gridded["depth"].attrs["axis"] = "Z"
tao_gridded = mixpods.prepare(tao_gridded, oni=pump.obs.process_oni())
tao_gridded.u.cf.plot()
tao_gridded.eucmax.plot()
tao_gridded = (
tao_gridded.update({
"n2s2pdf": mixpods.pdf_N2S2(
tao_gridded[["S2", "N2T"]].drop_vars(["shallowest", "zeuc"])
).persist()
}
)
)
tao_gridded
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 58. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 139586. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:245: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
KeyboardInterrupt
tao_Ri = xr.load_dataarray("tao-hourly-Ri-seasonal-percentiles.nc").cf.guess_coord_axis()
MITgcm stations#
stations = pump.model.read_stations_20(stationdirname)
gcmeq = stations.sel(longitude=[-155.025, -140.025, -125.025, -110.025], method="nearest")
#enso = pump.obs.make_enso_mask()
#mitgcm["enso"] = enso.reindex(time=mitgcm.time.data, method="nearest")
#gcmeq["eucmax"] = pump.calc.get_euc_max(gcmeq.u)
# pump.calc.calc_reduced_shear(gcmeq)
#oni = pump.obs.process_oni()
#gcmeq["enso_transition"] = mixpods.make_enso_transition_mask(oni).reindex(time=gcmeq.time.data, method="nearest")
mitgcm = gcmeq.sel(longitude=-140.025, method="nearest")
metrics = pump.model.read_metrics(stationdirname)
mitgcm_grid = xgcm.Grid(
metrics.sel(latitude=mitgcm.latitude, longitude=mitgcm.longitude, method="nearest"),
coords=({"Z": {"center": "depth", "outer": "RF"}, "Y": {"center": "latitude"}}),
metrics={"Z": ("drF", "drC")},
periodic=False,
boundary="fill",
fill_value=np.nan,
)
mitgcm = mixpods.prepare(mitgcm, grid=mitgcm_grid, oni=pump.obs.process_oni()).sel(latitude=0, method="nearest").cf.sel(Z=slice(-250))
mitgcm = mitgcm.update({"n2s2pdf": mixpods.pdf_N2S2(mitgcm)}).persist(scheduler=client)
mitgcm
mitgcm.u.cf.plot()
mitgcm.eucmax.cf.plot()
[<matplotlib.lines.Line2D at 0x2b6b605e48b0>]
mixpods.plot_n2s2pdf(mitgcm.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b6b121fe9e0>
MOM6 run : calculate ONI#
dirname = "/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/hist"
static = xr.open_dataset(*glob.glob(f"{dirname}/*static*.nc"))
sfc = xr.open_mfdataset(
sorted(glob.glob(f"{dirname}/*sfc*")),
coords="minimal",
data_vars="minimal",
compat="override",
use_cftime=True,
parallel=True,
)
sfc["time"] = sfc.time + datetime.timedelta(days=365*1957)
#sfc["time"] = sfc.time + xr.coding.cftime_offsets.YearBegin(1957)
sfc.coords.update(static.drop("time"))
#sfc["tos"].attrs["coordinates"] = "geolon geolat"
sst = sfc.cf["sea_surface_temperature"]
sst.cf
Coordinates:
- CF Axes: * X: ['xh']
* Y: ['yh']
* T: ['time']
Z: n/a
- CF Coordinates: * longitude: ['xh']
* latitude: ['yh']
* time: ['time']
vertical: n/a
- Cell Measures: area: ['areacello']
volume: n/a
- Standard Names: cell_area: ['areacello']
- Bounds: n/a
# Calculate a monthly average sea surface temperature in the Nino 3.4 region (5°S-5°N, 170°W-120°W).
monthly_ = (
sst.cf.sel(latitude=slice(-5, 5), longitude=slice(-170, -120))
.cf.mean(["X", "Y"])
.resample(time="M")
.mean()
#.sel(time=slice("1960", "1999-12-31"))
#.reindex(
# time=xr.cftime_range("1955-01-01", monthly.time[-1].dt.strftime("%Y-%m-%d").data.item(), freq="MS")
#)
.load()
)
monthly = (
monthly_
.reindex(
time=xr.cftime_range(
"1956-01-01",
monthly_.time[-1].dt.strftime("%Y-%m-%d").data.item(),
freq="M",
calendar=monthly_.time.dt.calendar,
)
)
.convert_calendar("gregorian", use_cftime=False)
)
#monthly = monthly_
monthly.plot()
[<matplotlib.lines.Line2D at 0x2b6b0ffe3880>]
monthly.coords["oni"] = mixpods.calc_oni(monthly)
monthly.coords["enso_transition_mask"] = mixpods.make_enso_transition_mask(monthly.oni)
(
oniobs.hvplot.line(x="time", label="obs")
* onimom6.hvplot.line(x="time", label="MOM6")
).opts(ylabel="ONI [°C]")
oniobs.enso_transition_mask.plot()
onimom6.enso_transition_mask.plot(color='r')
[<matplotlib.lines.Line2D at 0x2b6d7fd20eb0>]
MOM6 sections#
import mom6_tools
import xgcm
from mom6_tools.sections import combine_nested, read_raw_files
from mom6_tools.wright_eos import wright_eos
def combine_variables_by_coords(dsets):
import itertools
import tqdm
all_vars = set(itertools.chain(*[ds.data_vars for ds in dsets]))
merged = xr.merge(
xr.combine_by_coords([ds[var] for ds in dsets if ds.sizes["time"] > 0], combine_attrs="override")
for var in tqdm.tqdm(all_vars)
)
return merged
import glob
# dirname = "/glade/scratch/gmarques/gmom.e23.GJRAv3.TL319_t061_zstar_N65.mixpods.001/run"
dirname = "/glade/scratch/dcherian/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/run/"
files = sorted(glob.glob(f"{dirname}/*TAO*140W*.nc.*"))
lons = [140] # [90, 110, 140, 155, 170] # TODO: Add 125
dsets = read_raw_files(files, parallel=True)
combined = combine_variables_by_coords(dsets)
#mom6tao = read_tao_sections(dirname)
def interp_to_center(ds):
import toolz as tlz
ix0 = np.arange(1, ds.sizes["xq"], 4)
ix1 = ix0 + 1
indices = list(tlz.interleave([ix0, ix1]))
# print(ds.xq.data[indices])
out = (
ds
#.isel(xq=[1, 2, 5, 6, 8, 9, 12, 13, 16, 17])
.isel(xq=[1, 2])
.coarsen(xq=2).mean()
)
out = out.sel(xh = out.xq.data, method="nearest", tolerance=0.05)
for var in out:
if "xq" in out[var].dims:
out[var] = out[var].rename({"xq": "xh"}).drop("xh")
return out.drop_vars("xq")
mom6tao = interp_to_center(combined).cf.chunk({"Y": -1})
mom6tao["dens"] = wright_eos(mom6tao.thetao, mom6tao.so, 0)
mom6tao["dens"].attrs.update(
{"units": "kg/m^3", "standard_name": "sea_water_potential_density"}
)
mom6tao
100%|██████████| 21/21 [02:21<00:00, 6.76s/it]
<xarray.Dataset>
Dimensions: (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
yq: 49)
Coordinates:
* time (time) object 0001-01-06 00:30:00 ... 0056-01-05 23:30:00
* yh (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
* xh (xh) float64 -140.0
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
* zl (zl) float64 1.25 3.75 6.25 ... 5.627e+03 5.876e+03
* nv (nv) float64 1.0 2.0
* yq (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
Data variables: (12/22)
average_T2 (time) object dask.array<chunksize=(8640,), meta=np.ndarray>
KPP_kheat (time, zi, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
volcello (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
net_heat_surface (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
Kv_u (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
time_bnds (time, nv) timedelta64[ns] dask.array<chunksize=(8640, 2), meta=np.ndarray>
... ...
zos (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
KPP_OBLdepth (time, yh, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
vo (time, zl, yq, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>
average_DT (time) timedelta64[ns] dask.array<chunksize=(8640,), meta=np.ndarray>
tauy (time, yq, xh) float32 dask.array<chunksize=(8640, 49, 1), meta=np.ndarray>
dens (time, zl, yh, xh) float32 dask.array<chunksize=(8640, 40, 49, 1), meta=np.ndarray>Save#
mom6tao.chunk({"time": 24*365}).to_zarr(
f"/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
mode="w",
consolidated=True,
)
<xarray.backends.zarr.ZarrStore at 0x2b6dbf987d80>
Read sections#
mom6tao = (
xr.open_dataset(
"/glade/scratch/dcherian/archive/gmom.e23.GJRAv3.TL319_t061_zstar_N65.baseline.001.mixpods/ocn/moorings/tao.zarr",
engine="zarr"
)
.drop_vars(["average_T1", "average_T2"])
.chunk("auto")
)
mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
from mom6_tools.wright_eos import wright_eos
mom6tao["densT"] = wright_eos(mom6tao.thetao, 35, 0)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/coding/times.py:682: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range
dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/coding/times.py:360: FutureWarning: Index.ravel returning ndarray is deprecated; in a future version this will return a view on self.
sample = dates.ravel()[0]
/glade/scratch/dcherian/tmp/ipykernel_110208/2270305083.py:9: RuntimeWarning: Converting a CFTimeIndex with dates from a non-standard calendar, 'noleap', to a pandas.DatetimeIndex, which uses dates from the standard calendar. This may lead to subtle errors in operations that depend on the length of time between dates.
mom6tao["time"] = mom6tao.indexes["time"].to_datetimeindex()
grid = xgcm.Grid(
mom6tao,
coords={"Z": {"outer": "zi", "center": "zl"},
"X": {"center": "xh"},
"Y": {"center": "yh", "left": "yq"},
},
periodic=False,
boundary="fill",
fill_value=np.nan,
metrics = {("Z",): "h"},
)
grid
<xgcm.Grid>
Z Axis (not periodic, boundary='fill'):
* outer zi --> center
* center zl --> outer
X Axis (not periodic, boundary='fill'):
* center xh
Y Axis (not periodic, boundary='fill'):
* center yh --> left
* left yq --> center
mom6tao = mixpods.prepare(mom6tao, grid, monthly)
mom6tao
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yh', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yh', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xgcm/grid.py:1471: UserWarning: Metric at ('time', 'zi', 'yq', 'xh') being interpolated from metrics at dimensions ('time', 'zl', 'yh', 'xh'). Boundary value set to 'extend'.
warnings.warn(
<xarray.Dataset>
Dimensions: (time: 480600, yh: 49, xh: 1, zi: 66, zl: 65, nv: 2,
yq: 49)
Coordinates:
* nv (nv) float64 1.0 2.0
* time (time) datetime64[ns] 1958-01-06T00:30:00 ... 2013-01-0...
* xh (xh) float64 -140.0
* yh (yh) float64 -5.911 -5.664 -5.417 ... 5.541 5.788 6.035
* yq (yq) float64 -6.035 -5.788 -5.541 ... 5.417 5.664 5.911
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 5.503e+03 5.751e+03 6e+03
* zl (zl) float64 1.25 3.75 6.25 ... 5.627e+03 5.876e+03
oni (time) float32 nan nan nan nan nan ... nan nan nan nan nan
warm_mask (time) bool True True True True ... True True True True
cool_mask (time) bool False False False False ... False False False
enso_transition (time) <U12 '____________' ... '____________'
Data variables: (12/26)
KPP_OBLdepth (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
KPP_buoyFlux (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
KPP_kheat (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
Kd_heat (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 66, 49, 1), meta=np.ndarray>
Kv_u (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
SW (time, yh, xh) float32 dask.array<chunksize=(480600, 49, 1), meta=np.ndarray>
... ...
densT (time, zl, yh, xh) float32 dask.array<chunksize=(9612, 65, 49, 1), meta=np.ndarray>
N2T (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 49, 1), meta=np.ndarray>
S2 (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
shred2 (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
Rig_T (time, zi, yh, xh) float32 dask.array<chunksize=(9612, 1, 1, 1), meta=np.ndarray>
eucmax (time, xh) float64 dask.array<chunksize=(9612, 1), meta=np.ndarray>mom6140 = mom6tao.cf.sel(longitude=-140, latitude=0, method="nearest").cf.sel(Z=slice(150))
mom6140 = mom6140.update(
{"n2s2pdf": mixpods.pdf_N2S2(mom6140[["S2", "N2T", "eucmax"]])}
)
mom6140[["uo", "eucmax", "n2s2pdf", "S2", "N2T"]].load()
<xarray.Dataset>
Dimensions: (time: 480600, zl: 21, N2T_bins: 29, S2_bins: 29,
enso_transition_phase: 7, zi: 22)
Coordinates: (12/14)
* time (time) datetime64[ns] 1958-01-06T00:30:00 ... 2013...
xh float64 -140.0
yh float64 0.0625
yq float64 -0.0625
* zi (zi) float64 0.0 2.5 5.0 7.5 ... 120.7 133.7 147.6
* zl (zl) float64 1.25 3.75 6.25 ... 114.5 127.2 140.6
... ...
cool_mask (time) bool False False False ... False False False
enso_transition (time) <U12 '____________' ... '____________'
* N2T_bins (N2T_bins) object (-5.0, -4.896551724137931] ... (...
* S2_bins (S2_bins) object (-5.0, -4.896551724137931] ... (-...
* enso_transition_phase (enso_transition_phase) object 'none' ... 'all'
bin_areas (N2T_bins, S2_bins) float64 0.0107 0.0107 ... 0.0107
Data variables:
uo (time, zl) float32 -0.4169 -0.3548 ... 0.9186 0.8345
eucmax (time) float64 114.5 114.5 114.5 ... 127.2 127.2
n2s2pdf (N2T_bins, S2_bins, enso_transition_phase) float64 ...
S2 (time, zi) float32 nan 0.000642 ... 6.569e-05
N2T (time, zi) float32 nan 0.0001751 ... 0.0001191mom6140.uo.cf.plot(robust=True)
mom6140.eucmax.cf.plot()
[<matplotlib.lines.Line2D at 0x2b6ae1ca6fb0>]
mixpods.plot_n2s2pdf(mom6140.n2s2pdf.sel(enso_transition_phase="none"))
<matplotlib.contour.QuadContourSet at 0x2b6b61cf1c00>
Pick simulations#
datasets = {"TAO": tao_gridded, "MITgcm": mitgcm, "MOM6": mom6140}
Compare EUC maximum#
mixpods.plot_eucmax_timeseries(datasets, obs="TAO")